library(timetk)
library(inspectdf)
library(janitor)
library(readr)
library(dplyr)
library(readr)
library(ggplot2)
library(naniar)
library(packcircles)
library(ggridges)
library(ggbeeswarm)
library(patchwork)
library(dplyr)
library(tidyr)
library(readr)
library(skimr)
library(purrr)
library(stringr)
library(urltools)
library(magrittr)
library(lubridate)
library(janitor)

Bevezetés

Az adatsor Tetouan City 2017-es áramfogyasztásának alakulását írja le 10 percenkénti bontásban 3 zónára.

Hivatkozás

Salam, A., & El Hibaoui, A. (2018, December). Comparison of Machine Learning Algorithms for the Power Consumption Prediction:-Case Study of Tetouan city–. In 2018 6th International Renewable and Sustainable Energy Conference (IRSEC) (pp. 1-5). IEEE

Változók

  • Date Time: 2017.01.01. 00:00 órától 10 percenként 2017.12.30.24:00-ig.
  • Temperature: Hőmérséklet a városban
  • Humidity: Páratartalom
  • Wind Speed: szél sebesség
  • general diffuse flows: általános diffúz áramlás
  • diffuse flows: diffúz áramlás
  • power consumption of zone 1 of Tetouan city: 1. zóna fogyasztéás
  • power consumption of zone 2 of Tetouan city: 2. zóna fogyasztás
  • power consumption of zone 3 of Tetouan city: 3. zóna fogyasztás

Cél

az áramfogyasztás előrejelzése idősor elemzéssel

Adatok betöltése

url <-
  "https://raw.githubusercontent.com/fzksmrk/adatbanyaszati-beadando-2/main/data.csv"
data <- readr::read_csv(
  url,
  col_types = cols(
    DateTime = col_datetime(format = "%m/%d/%Y %H:%M"),
    Temperature = col_double(),
    Humidity = col_double(),
    `Wind Speed` = col_double(),
    `general diffuse flows` = col_double(),
    `diffuse flows` = col_double(),
    `Zone 1 Power Consumption` = col_double(),
    `Zone 2  Power Consumption` = col_double(),
    `Zone 3  Power Consumption` = col_double()
  )
)

data
data <- janitor::clean_names(data)
data <- dplyr::rename(data,"zone_1" = "zone_1_power_consumption","zone_2" = "zone_2_power_consumption","zone_3" = "zone_3_power_consumption")
data

Új változók létrehozása

Idő bővítés

data <- data %>%
  timetk::tk_augment_timeseries_signature(date_time) %>%
  select(
    -matches(
      "(half)|(wday)|(mday)|(qday)|(mday)|(yday)|(mweek)|(xts)|(second)|(minute)|(iso)|(num)|(hour12)|(am.pm)|(week\\d)|(mday7)"
    )
  ) %>%
  select(-diff)

Adatok vizsgálata

inspectdf::inspect_num(data)

Általános változók

data %>% 
  ggplot(aes(date_time, temperature)) +
  geom_line() +
  labs(
    x = NULL,
    y = "Temperature"
  ) -> p1

data %>% 
  ggplot(aes(date_time, humidity)) +
  geom_line() +
  labs(
    x = NULL,
    y = "Humidity"
  ) -> p2

data %>% 
  ggplot(aes(date_time, wind_speed)) +
  geom_line() +
  labs(
    x = NULL,
    y = "Wind Speed"
  ) -> p3

data %>% 
  select(date_time, contains("flows")) %>% 
  pivot_longer(-date_time) %>% 
  ggplot(aes(date_time, value)) +
  geom_line(aes(color = name)) +
  labs(
    x = NULL,
    y = "Flows",
    color = "Flows"
  ) -> p4

p1 / p2 / p3 / p4

Zónák energia felhasználása

data %>% 
  select(date_time, contains("zone")) %>% 
  pivot_longer(-date_time) %>% 
  ggplot(aes(date_time, value)) +
  geom_line(aes(color = name)) +
  scale_y_continuous(labels = scales::label_number_si()) +
  labs(
    x = NULL,
    y = "Power",
    color = "Zone"
  )

Decompose

decompose_ts <- function(series, freq = 144) {
  ts_obj <- ts(series, frequency = freq)
  decompose(ts_obj)
}
plot(decompose_ts(data$zone_1))

plot(decompose_ts(data$zone_2))

plot(decompose_ts(data$zone_3))

plot(decompose_ts(data$temperature))

plot(decompose_ts(data$wind_speed))

plot(decompose_ts(data$humidity))

plot(decompose_ts(data$general_diffuse_flows))

plot(decompose_ts(data$diffuse_flows))

Prediktív elemzés